

import requests
import parsel
import json
from redis import StrictRedis

def main():
    url = 'https://top.baidu.com/board?tab=movie&sa=fyb_movie&sa=search_31065'
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36'}  # 伪装
    # 伪装爬虫
    r = requests.get(url, headers=headers)  # 请求网站
    body = r.content.decode('utf-8')
    sel = parsel.Selector(body)
    name =  sel.css('.c-single-text-ellipsis::text').getall()
    link = sel.css('.title_dIF3B::attr(href)').getall()
    number = sel.css('.hot-index_1Bl1a::text').getall()
    data_list = []
    for i in range(0,30,3):
        data={}
        data['name'] = name[i]
        data_list.append(data)
    for i in range(0,10):
        data_list[i]['link'] = link[i]
        data_list[i]['number'] = number[i]
    redis = StrictRedis(host='localhost', port=6379, decode_responses=True)
    save_redis(redis, data_list)
def save_redis(redis, data_list):
    redis.delete('film_hot')
    for o in data_list:
        # s = json.dumps(o.__dict__, ensure_ascii=False)
        s = json.dumps(o, ensure_ascii=False)
        print(s)
        redis.rpush('film_hot', s)
if __name__ == '__main__':
    main()